# This script will create a stacked bar plot from a beta model of LDA clusters.
# 
# Usage:
#       Rscript betaBarPlot.R [inputfile] [outputfile]
# 
# Where [inputfile] is a beta LDA model for wikipedia user clusters
# that must be tab-separated, and for the 20 used namespaces
# 
# Where [outputfile] is the path for a PNG file to save the output plot.
#
# The number of clusters is inferred by the number of columns in the model.
#
# The plot will have no legend, do that separately and chop it in.
# There is a pre-made legend file in the Google Doc for these results that 
# is consistent with the coloring that this file uses. 

args <- commandArgs(TRUE)

inputfile <- args[1]
outputfile <- args[2]

model <- read.table(file=inputfile, sep="\t", quote="", comment.char="")
modelmatrix <- as.matrix(model)

clustersize <- dim(modelmatrix)[2]

namespaces <- c("Main", "Talk", "User", "User Talk", "Project", "Project Talk", "File", "File Talk", "MediaWiki", "MediaWiki Talk", "Template", "Template Talk", "Help", "Help Talk", "Category", "Category Talk", "Portal", "Portal Talk", "Book", "Book Talk")
clusters <- as.character(c(1:clustersize))
title <- "Namespace Edit Distribution By Archetype"
xlabel <- "Archetype"
ylabel <- expression(paste("Parameter of Dirichlet Prior (", beta, ")", sep=""))
colors <- c("red", "lightblue", "darkblue", "lightgreen", "darkgreen", "darkorange", "deeppink", "black", "yellow", "brown", "darkmagenta", "tan", "seashell2", "lightsalmon", "darkslategray", "aquamarine", "darkgoldenrod", "darkorchid4", "gray27", "gray64")

png(outputfile)

barplot(modelmatrix, main=title, ylab=ylabel, xlab=xlabel, names.arg=clusters, col=colors)

dev.off()
